In [ ]:
%run "../Functions/3. Per session and per user analysis.ipynb"
In [ ]:
rmdf1522.head()
In [ ]:
testSessionId = "fab3ea03-6ff1-483f-a90a-74ff47d0b556"
perSession = rmdf1522[rmdf1522['type']=='reach'].loc[:,perSessionRelevantColumns]
perSession = perSession[perSession['sessionId']==testSessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
perSession
In [ ]:
allSessions = getAllSessions( rmdf1522, True )
allSessions.head()
In [ ]:
allSessions[allSessions['sessionId']==testSessionId]
In [ ]:
allSessions[allSessions['userId']=='e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1']
In [ ]:
# English-speaking user who answered the questionnaire - cf 'Google form analysis.ipynb'.
localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'
localplayerguid
In [ ]:
perUserRelevantColumns = ['sessionId', 'serverTime', 'section']
In [ ]:
sessionsList = getAllSessionsOfUser(rmdf1522, localplayerguid, True)
sessionsList
In [ ]:
# List all 'reach' events with those sessionIds.
perUser = rmdf1522[rmdf1522['type']=='reach'].loc[:,perUserRelevantColumns]
perUser = perUser[perUser['sessionId'].isin(sessionsList['sessionId'])]
perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
perUser.describe()
perUser.head()
In [ ]:
#sectionsList = perSession
sectionsList = perUser
In [ ]:
testUser = getRandomGFormGUID()
testSession = getRandomSessionGUID( _userId = testUser )
In [ ]:
timedSections1 = getCheckpointsTimes(testSession)
timedSections1
In [ ]:
sessionId = testSession
_rmDF = rmdf1522
testCounter = 0
# Returns a given session's checkpoints, the first server time at which they were reached, and completion time
#def getCheckpointsTimes( sessionId, _rmDF = rmdf1522 ):
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perSession = reachEvents[reachEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
timedSections['firstReached'] = pd.Timestamp(0, tz='utc')
timedSections['firstCompletionDuration'] = pd.Timedelta.max
if(len(perSession) > 0):
timedSections["firstReached"] = perSession.groupby("section").agg({ "serverTime": np.min })
timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()
if(timedSections.loc["tutorial1.Checkpoint00","firstReached"] != pd.Timestamp(0, tz='utc')):
timedSections.loc["tutorial1.Checkpoint00","firstCompletionDuration"] = \
pd.Timedelta(0)
timedSections["firstReached"] = timedSections["firstReached"].fillna(pd.Timestamp(0, tz='utc'))
timedSections["firstCompletionDuration"] = timedSections["firstCompletionDuration"].fillna(pd.Timedelta.max)
timedSections
In [ ]:
len(timedSections)
In [ ]:
chapter = "tutorial1.Checkpoint01"
time = ''
if(not chapter in timedSections.index):
print("no timed sections")
else:
time = timedSections.loc[chapter,"firstCompletionDuration"]
time
In [ ]:
timedSections1 == timedSections
In [ ]:
reachEvents.iloc[0,0]
In [ ]:
#'7412a447-8177-48e9-82c5-cb31032f76a9': didn't answer
testUser = getRandomGFormGUID()
testResult = getUserDataVector(testUser)
print(testUser)
testResult
In [ ]:
testResult[testUser]['death']
In [ ]:
testResult = getUserDataVector('e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
testResult
In [ ]:
testResult = getUserDataVector('8d352896-a3f1-471c-8439-0f426df901c1')
testResult
In [ ]:
gformNotEnough = []
print(gformNotEnough)
gformNotEnough.append(5)
print(gformNotEnough)
gformNotEnough = pd.Series(gformNotEnough)
print(gformNotEnough)
gformNotEnough = np.array([])
print(gformNotEnough)
gformNotEnough = np.append(gformNotEnough, [5])
print(gformNotEnough)
gformNotEnough = pd.Series(gformNotEnough)
print(gformNotEnough)
testNonVal = pd.Series(['tutorial1.Checkpoint13'])
In [ ]:
userId = getRandomRedMetricsGUID()
_rmDF = rmdf1522
# Returns a given user's unique reached checkpoints
#def getUserCheckpoints( userId, _rmDF = rmdf1522 ):
#print("getUserCheckpoints(" + str(userId) + ")")
# List of associated sessions
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
#print("sessionsList=" + str(sessionsList))
# List all 'reach' events with those sessionIds.
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perUser = reachEvents[reachEvents['sessionId'].isin(sessionsList['sessionId'].values)]
perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
pd.Series(perUser['section'].unique())
In [ ]:
gformNonVal = getNonValidatedCheckpoints(userId)
gformVal = getValidatedCheckpoints(userId)
gameVal = getUserCheckpoints(userId)
print(str(gformNonVal))
print()
print(str(gformVal))
print()
print(str(gameVal))
userId = '"72002481-18a1-4de2-8749-553bbabe119e"'
if(hasAnswered(userId)): gformNonVal = getNonValidatedCheckpoints(userId) gformVal = getValidatedCheckpoints(userId) gameVal = getUserCheckpoints(userId)
#sorted, unique values in series1 that are not in series2
#np.setdiff1d(series1.values, series2.values)
#user has answered questions whose answer they haven't seen in the game
gameNotEnough = pd.Series(np.setdiff1d(gformVal.values, gameVal.values))
#user has not answered questions whose answer they have seen in the game
gformNotEnough = []
maxGameVal = ''
if gameVal.values.size!=0:
gameVal.values.max()
for nonVal in gformNonVal.values:
if nonVal >= maxGameVal:
gformNotEnough.append(nonVal)
gformNotEnough = pd.Series(gformNotEnough)
result = (gameNotEnough, gformNotEnough)
else: result = ([],[]) result
In [ ]:
randomguid = getRandomRedMetricsGUID()
randomguid
In [ ]:
gformNonVal = getNonValidatedCheckpoints(randomguid)
gformNonVal
In [ ]:
gformVal = getValidatedCheckpoints(randomguid)
gformVal
In [ ]:
gameVal = getUserCheckpoints( randomguid )
gameVal
In [ ]:
#user has not answered questions whose answer they have seen in the game
gformNotEnough = []
maxGameVal = ''
if gameVal.values.size!=0:
gameVal.values.max()
for nonVal in gformNonVal.values:
if nonVal >= maxGameVal:
gformNotEnough.append(nonVal)
gformNotEnough = pd.Series(gformNotEnough)
getDiscrepancyGameGForm( randomguid )
In [ ]:
test = getValidatedCheckpoints(localplayerguid)
test
maxValue = ''
if (len(test) > 0):
maxValue = test.values.max()
maxValue
getNonValidatedCheckpoints(localplayerguid)
testlocalplayerguid = '7412a447-8177-48e9-82c5-cb31032f76a9'
test = pd.DataFrame({
'section' : ['tutorial1.Checkpoint00', 'tutorial1.Checkpoint01', 'tutorial1.Checkpoint02'],
'serverTime' : ['0', '1', '2'],
'firstReached' : ['0', '1', '2'],
'firstCompletionDuration' : ['0', '1', '2'],
})
test
#pd.DataFrame({ 'A' : 1.,
# 'B' : pd.Timestamp('20130102'),
# 'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
# 'D' : np.array([3] * 4,dtype='int32'),
# 'E' : pd.Categorical(["test","train","test","train"]),
# 'F' : 'foo' })
In [ ]:
# incomplete game
#_userId = '958a0e85-1634-4559-bce6-d6af28b7e649'
_userId = 'dfe8f036-8641-4d6c-8411-8a8346bb0402'
#_userId = getRandomRedMetricsGUID()
_sessionsList = []
_rmDF = rmdf1522
# Returns a given user's checkpoints, the first server time at which they were reached, and completion time
#def getCheckpointsTimesUser( _userId, _sessionsList = [], _rmDF = rmdf1522 ):
# List of associated sessions
if( len(_sessionsList) == 0):
_sessionsList = getAllSessionsOfUser( _rmDF, _userId, True )
# Call getCheckpointsTimes on all sessions associated with user,
# then merge by taking oldest checkpoint completion
_timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
_timedSections["firstReached"] = pd.Timestamp(0, tz='utc')
_timedSections["firstCompletionDuration"] = pd.Timedelta.max
# merge
# for each checkpoint reached, update if necessary
for _sessionId in _sessionsList['sessionId']:
_thisSessionTimes = getCheckpointsTimes( _sessionId )
for _checkpointName in _thisSessionTimes.index:
if ((_thisSessionTimes.loc[_checkpointName, 'firstReached'] != pd.Timestamp(0, tz='utc'))
and
((_timedSections.loc[_checkpointName, 'firstReached'] == pd.Timestamp(0, tz='utc'))
or (_timedSections.loc[_checkpointName, 'firstReached'] > _thisSessionTimes.loc[_checkpointName, 'firstReached']))
):
_timedSections.loc[_checkpointName, 'firstReached'] = _thisSessionTimes.loc[_checkpointName, 'firstReached']
_timedSections.loc[_checkpointName, 'firstCompletionDuration'] = _thisSessionTimes.loc[_checkpointName, 'firstCompletionDuration']
_timedSections
print('second pass')
previous = '' for checkpointName in thisSessionTimes.index: if(checkpointName != "tutorial1.Checkpoint00"): if( timedSections.loc[previous,"firstReached"] != pd.Timestamp(0) and timedSections.loc[checkpointName,"firstReached"] != pd.Timestamp(0) ): timedSections.loc[checkpointName,"firstCompletionDuration"] =\ timedSections.loc[checkpointName,"firstReached"] - timedSections.loc[previous,"firstReached"] previous = checkpointName
timedSections
In [ ]:
testUser = "3fe0632f-b218-41c3-adfd-27083f271c19"
testSession = getRandomSessionGUID( _userId = testUser )
_rmDF[_rmDF['sessionId']==sessionId]
In [ ]:
length = 1
allUserIds = np.array(rmdf1522['userId'].unique())
allUserIds = [i for i in allUserIds if not i in ['nan', np.nan, 'null']]
for user in allUserIds:
testUser = user #getRandomGFormGUID()
testSession = getRandomSessionGUID( _userId = testUser )
#testUser = '8172f20e-c29b-4fda-9245-61ab05a84792'
if testSession != '':
sessionId = testSession
#print(sessionId)
_rmDF = rmdf1522
# Returns a given session's total playtime and day count
#def getPlayedTimeSession( sessionId, _rmDF = rmdf1522 ):
sessionEvents = _rmDF[_rmDF['sessionId']==sessionId]
sessionTimesTutorial = sessionEvents[sessionEvents['section'].str.startswith('tutorial', na=False)]['userTime']
#sessionTimesTutorial = sessionTimesTutorial.groupby(sessionTimesTutorial).diff()
sessionTimesTutorial.index = sessionTimesTutorial.values
sessionTimesTutorial = sessionTimesTutorial.groupby(pd.TimeGrouper('D')).agg({ "start": np.min, "end": np.max })
#, pd.TimeGrouper('D')
#sessionEventsSandbox = sessionEvents[sessionEvents['section'].str.startswith('sandbox', na=False)]
#print([0,0])
#type(sessionTimesTutorial),sessionTimesTutorial,testUser
length = len(sessionTimesTutorial.index)
if (length > 1):
print("user = " + str(testUser) + " session = " + str(testSession) + " length = " + str(length))
In [ ]:
# checks
#usersWithSeveralSessions = []
#for userId in allUserIds:
# count = countSessions(userId, False, [], rmdf1522)
# if(count > 3):
# usersWithSeveralSessions.append(userId)
#print("userId="+str(userId)+" : " + str(count))
#rmdf1522[rmdf1522['userId']=='57e2b6b7-c308-4492-9228-f753d5b3044c']['customData.platform'].unique()
#rmdf1522[rmdf1522['userId']=='57e2b6b7-c308-4492-9228-f753d5b3044c']
#userId = 'deb089c0-9be3-4b75-9b27-28963c77b10c'
#for userId in usersWithSeveralSessions:
# print(str(userId)+" :")
# for sessionId in getAllSessionsOfUser(rmdf1522, userId)['sessionId']:
# print(str(sessionId)+" : " + str(getPlayedTimeSession(sessionId)))
# print()
In [ ]:
testSession = "7ea5d49a-14f3-40b8-b9c4-d3d52eb0c4e1" #4
#sessionEvents = pd.DataFrame(columns=_rmDF.columns)
sessionEvents = rmdf1522[rmdf1522['sessionId']==testSession]
mode = 'tutorial'
#def getPlayedTimeSessionMode(sessionEvents, mode):
sessionTimes = sessionEvents[sessionEvents['section'].str.startswith(mode, na=False)]['userTime']
sessionTimes.index = sessionTimes.values
daysSpent = set()
totalSpentTime = pd.Timedelta(0)
if(len(sessionTimes) > 0):
sessionTimes = sessionTimes.groupby(pd.TimeGrouper('D')).agg({ "start": np.min, "end": np.max })
daysSpent = set(sessionTimes.index)
sessionTimes['played'] = sessionTimes['end'] - sessionTimes['start']
totalSpentTime = sessionTimes['played'].sum()
{'daysSpent': daysSpent, 'totalSpentTime': totalSpentTime}
In [ ]:
getPlayedTimeSessionMode(sessionEvents, 'tutorial')
In [ ]:
getPlayedTimeSessionMode(pd.DataFrame(columns=_rmDF.columns), 'tutorial')
In [ ]:
#testUser = user #getRandomGFormGUID()
#testSession = getRandomSessionGUID( _userId = testUser )
#testUser = '8172f20e-c29b-4fda-9245-61ab05a84792'
#testSession = "1d16f3f2-2f76-49ee-bb37-9742ed54287a" #5 + NaT
testSession = "7ea5d49a-14f3-40b8-b9c4-d3d52eb0c4e1" #4
sessionId = testSession
#print(sessionId)
_rmDF = rmdf1522
# Returns a given session's total playtime and day count
#def getPlayedTimeSession( sessionId, _rmDF = rmdf1522 ):
sessionEvents = _rmDF[_rmDF['sessionId']==sessionId]
tutorialTime = getPlayedTimeSessionMode(sessionEvents, 'tutorial')
sandboxTime = getPlayedTimeSessionMode(sessionEvents, 'sandbox')
{'tutorial': tutorialTime, 'sandbox': sandboxTime}
In [ ]:
getPlayedTimeSession('', _rmDF = _rmDF)
In [ ]:
a = getPlayedTimeSession("054a96ca-c2f1-4967-9b77-6ce4c33c9d33")
b = getPlayedTimeSession("e5421d6c-2f55-4279-8d82-bbafbe16d635")
a,b
In [ ]:
c = {'sandbox':
{
'daysSpent':
{
pd.Timestamp('2017-06-07 00:00:00', freq='D'),
pd.Timestamp('2017-06-08 00:00:00', freq='D'),
pd.Timestamp('2017-06-09 00:00:00', freq='D'),
pd.Timestamp('2017-06-10 00:00:00', freq='D'),
pd.Timestamp('2017-06-11 00:00:00', freq='D'),
},
'totalSpentTime': pd.Timedelta('0 days 00:09:34.662000')
},
'tutorial':
{
'daysSpent':
{
pd.Timestamp('2017-06-07 00:00:00', freq='D'),
pd.Timestamp('2017-06-08 00:00:00', freq='D'),
pd.Timestamp('2017-06-09 00:00:00', freq='D'),
pd.Timestamp('2017-06-10 00:00:00', freq='D'),
pd.Timestamp('2017-06-11 00:00:00', freq='D'),
pd.Timestamp('2017-06-12 00:00:00', freq='D'),
},
'totalSpentTime': pd.Timedelta('0 days 00:00:11.007000')
}
}
d = {'sandbox':
{
'daysSpent':
{
pd.Timestamp('2017-06-06 00:00:00', freq='D'),
pd.Timestamp('2017-06-07 00:00:00', freq='D'),
pd.Timestamp('2017-06-08 00:00:00', freq='D'),
pd.Timestamp('2017-06-09 00:00:00', freq='D'),
pd.Timestamp('2017-06-10 00:00:00', freq='D'),
},
'totalSpentTime': pd.Timedelta('0 days 00:09:34.662000')
},
'tutorial':
{
'daysSpent':
{
pd.Timestamp('2017-06-05 00:00:00', freq='D'),
pd.Timestamp('2017-06-06 00:00:00', freq='D'),
pd.Timestamp('2017-06-07 00:00:00', freq='D'),
pd.Timestamp('2017-06-08 00:00:00', freq='D'),
pd.Timestamp('2017-06-09 00:00:00', freq='D'),
pd.Timestamp('2017-06-10 00:00:00', freq='D'),
},
'totalSpentTime': pd.Timedelta('0 days 00:00:11.007000')
}
}
c['tutorial']['daysSpent'] | d['tutorial']['daysSpent']
In [ ]:
#a = getPlayedTimeSession("054a96ca-c2f1-4967-9b77-6ce4c33c9d33")
#b = getPlayedTimeSession("e5421d6c-2f55-4279-8d82-bbafbe16d635")
a = c
b = d
#print(a['sandbox']['daysSpent'], a['sandbox']['totalSpentTime'],\
#a['tutorial']['daysSpent'], a['tutorial']['totalSpentTime'],\
#b['sandbox']['daysSpent'], b['sandbox']['totalSpentTime'],\
#b['tutorial']['daysSpent'], b['tutorial']['totalSpentTime'])
#print(a,b)
#def mergePlayedTimes(a, b):
result = a.copy()
for gameMode in a:
result[gameMode] = {
'totalSpentTime': a[gameMode]['totalSpentTime'] + b[gameMode]['totalSpentTime'],
'daysSpent': np.unique(a[gameMode]['daysSpent'] | b[gameMode]['daysSpent']),
}
result
In [ ]:
#userId = 'ae72a4cb-244e-475c-80ea-11a410266645'
userId = '6bc0f58c-26ed-4be9-9596-2a9ad8d11d67'
_sessionsList = []
_rmDF = rmdf1522
# Returns a given user's total playtime and day count
#def getPlayedTimeUser( userId, _sessionsList = [], _rmDF = rmdf1522 ):
result = getPlayedTimeSession('', _rmDF = _rmDF)
if(len(_sessionsList) == 0):
_sessionsList = getAllSessionsOfUser(_rmDF, userId)
for session in _sessionsList['sessionId']:
#for session in ["e5421d6c-2f55-4279-8d82-bbafbe16d635","e5421d6c-2f55-4279-8d82-bbafbe16d635","e5421d6c-2f55-4279-8d82-bbafbe16d635"]:
playedTimes = getPlayedTimeSession(session, _rmDF)
result = mergePlayedTimes(result, playedTimes)
result
In [ ]:
sessionId = "fab3ea03-6ff1-483f-a90a-74ff47d0b556"
_rmDF = rmdf1522
# Returns a given session's checkpoints, and death count
#def getDeaths( sessionId, _rmDF = rmdf1522 ):
deathEvents = _rmDF[_rmDF['type']=='death'].loc[:,perSessionRelevantColumns]
perSession = deathEvents[deathEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
deathsSections = perSession.groupby("section").size().reset_index(name='deathsCount')
deathsSections
In [ ]:
userId = 'ae72a4cb-244e-475c-80ea-11a410266645'
_rmDF = rmdf1522
#def getDeathsUser( userId, _rmDF = rmdf1522 ):
#print("getDeathsUser(" + str(userId) + ")")
# List of associated sessions
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
#print("sessionsList=" + str(sessionsList))
# Call getDeaths on all sessions associated with user,
# then merge by adding
deathsSections = pd.DataFrame(0, columns=timedSectionsDeathsColumns,index=timedSectionsIndex)
for sessionId in sessionsList['sessionId']:
#print("processing user " + str(userId) + " with session " + str(sessionId))
deaths = getDeaths( sessionId )
# merge
# for each checkpoint reached, update if necessary
for index in deaths.index:
#print("index=" + str(index))
checkpointName = deaths['section'][index]
#print("checkpointName=" + str(checkpointName))
#print("deaths['deathsCount']["+str(index)+"]=" + str(deaths['deathsCount'][index]))
deathsSections['deathsCount'][checkpointName] = deathsSections['deathsCount'][checkpointName] + deaths['deathsCount'][index]
deathsSections
In [ ]:
# craftEventCodes = list(["equip","unequip","add","remove"])
eventCode = 'equip'
userId = getRandomRedMetricsGUID()
sessionsList=[]
_rmDF = rmdf1522
#def getUserCraftEventsTotal( eventCode, userId, sessionsList=[], _rmDF = rmdf1522 ):
if(len(sessionsList) == 0):
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
result = 0
if eventCode in craftEventCodes:
eventType = craftEventsColumns['eventType'][eventCode]
events = _rmDF[_rmDF['type']==eventType]
events = events[events[craftEventsColumns['column'][eventCode]].notnull()]
perSession = events[events['sessionId'].isin(sessionsList['sessionId'])]
result = len(perSession)
else:
print("incorrect event code '" + eventCode + "'")
result, userId
In [ ]:
In [ ]:
eventType = 'death'
#userId = 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1'
userId = getRandomRedMetricsGUID()
sessionsList=[]
_rmDF = rmdf1522
#def getUserEventsTotal( eventType, userId, sessionsList=[], _rmDF = rmdf1522 ):
if(len(sessionsList) == 0):
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
sessionEvents = _rmDF[_rmDF['type']==eventType]
perSession = sessionEvents[sessionEvents['sessionId'].isin(sessionsList['sessionId'])]
len(perSession)
In [ ]:
userId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
#sample = gform[gform[localplayerguidkey] == userId]
In [ ]:
_rmDF[_rmDF['sessionId'] == _sessionId]['type'].value_counts()
In [ ]:
_rmDF = rmdf1522
sessions = getAllSessionsOfUser( _rmDF, userId, True )
_sessionId = sessions['sessionId'].iloc[0]
# for per-session, manual analysis
#def getSessionDataPreview( _sessionId, _rmDF ):
_logs = _rmDF[_rmDF['sessionId'] == _sessionId]
_timedEvents = _logs['userTime']
_timedEvents = _timedEvents.sort_values()
_platform = _logs['customData.platform'].dropna().values
if(len(_platform) > 0):
_platform = _platform[0]
else:
_platform = ''
_events = _logs['type'].value_counts()
result = {
'first' : _timedEvents.iloc[0],
'last' : _timedEvents.iloc[-1],
'platform' : _platform,
'events' : _events
}
print(result)
In [ ]:
events, first, last, platform, = result.values()
In [ ]:
first, last, platform, events
In [ ]:
userId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
#sample = gform[gform[localplayerguidkey] == userId]
In [ ]:
events, first, last, platform
In [ ]:
events
In [ ]:
sdp = getSessionDataPreview(_sessionId, _rmDF = _rmDF)
In [ ]:
sdp
In [ ]:
#userId = getRandomGFormGUID()
_rmDF = rmdf1522
scoreLabel = 'score'
# for per-user, manual analysis
#def getUserDataPreview( userId, _rmDF = rmdf1522 ):
result = pd.DataFrame(
columns = [userId]
)
# [ ] RM
result.loc['REDMETRICS ANALYSIS'] = ' '
# [ ] sessions count
sessions = getAllSessionsOfUser( _rmDF, userId, True )
result.loc['sessions', userId] = len(sessions)
# [ ] first event date
result.loc['firstEvent', userId] = getFirstEventDate( userId )
# [ ] time played
# [ ] dates played
# [ ] first played, last played
sessionIds = sessions['sessionId']
for _sessionIdIndex in range(0, len(sessions['sessionId'])):
_sessionId = sessionIds.iloc[_sessionIdIndex]
sdp = getSessionDataPreview(_sessionId, _rmDF = _rmDF)
result.loc['session' + str(_sessionIdIndex) + ' platform',userId] = sdp['platform']
result.loc['session' + str(_sessionIdIndex) + ' first',userId] = sdp['first']
result.loc['session' + str(_sessionIdIndex) + ' last',userId] = sdp['last']
result.loc['session' + str(_sessionIdIndex) + ' events',userId] = str(sdp['events'])
# [ ] best chapter
# [ ] counts of events: deaths, crafts,...
# [ ] GF
result.loc['GFORM ANALYSIS'] = ' '
# [ ] score(s)
score = getScore( userId )
for _temporality in score.columns:
_score = score.loc[scoreLabel,_temporality]
if(len(_score)>0):
if(_temporality == answerTemporalities[0]):
_score = _score[len(_score)-1]
else:
_score = _score[0]
else:
_score = np.nan
result.loc[scoreLabel+_temporality,userId] = _score
# [ ] progression
# [ ] demographics
result.loc[scoreLabel+'s',userId] = str(score.values)
gfDataPreview = getGFormDataPreview(userId, gform)
features = {1: 'date', 2: 'temporality RM', 3: 'temporality GF', 4: 'score', 5: 'genderAge'}
for key in gfDataPreview:
for featureKey in features:
result.loc[key + ' ' + features[featureKey]] = str(gfDataPreview[key][features[featureKey]])
index = 0
for match in gfDataPreview[key]['demographic matches']:
result.loc[key + ' demographic match ' + str(index)] = repr(match)
index += 1
result
In [ ]:
answerTemporalities
In [ ]:
#getUserDataPreview(undefinedId)
In [ ]:
for undefinedId in gform[gform[QTemporality] == answerTemporalities[2]][localplayerguidkey]:
getUserDataPreview(undefinedId)
In [ ]:
rdfcrafttest = pd.read_csv("../../data/2017-10-10.craft-test.csv")
rdfcrafttest = getNormalizedRedMetricsCSV(rdfcrafttest)
rdfcrafttest
In [ ]:
craftEventsColumns
In [ ]:
craftEventsColumns['column']['equip']
In [ ]:
type(craftEventCodes)
In [ ]:
test = np.unique(np.concatenate((perSessionRelevantColumns, [craftEventsColumns['column']['equip']])))
test
In [ ]:
# user 344 adds
#'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1'
# one of its sessions
# fab3ea03-6ff1-483f-a90a-74ff47d0b556
#
# user 22 adds
#'8d352896-a3f1-471c-8439-0f426df901c1'
#
# session test
craftSessionTest = getSectionsCraftEvents('equip', "fab3ea03-6ff1-483f-a90a-74ff47d0b556")
# user test
craftUserTest = getUserSectionsCraftEvents('equip', 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
# user count test
craftUserTestCount = getUserSectionsCraftEventsTotal('equip', 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
craftUserTestCount
In [ ]:
print("craftSessionTest=" + str(craftSessionTest))
print("craftUserTest=" + str(craftUserTest))
print("craftUserTestCount=" + str(craftUserTestCount))
In [ ]:
columnName = craftEventsColumns['column']['equip']
columnName
In [ ]:
result = list([])
for entry in rmdf1522[columnName]:
if not pd.isnull(entry):
result.append(entry)
result
In [ ]:
#rmdf1522[columnName].notnull()
In [ ]:
sectionsEvents = pd.DataFrame(0, columns=eventSectionsCountColumns, index=range(0))
sectionsEvents
In [ ]:
#events = rmdf1522[rmdf1522['type']==eventType and not rmdf1522[craftEventsColumns['column'][eventCode]].isnull()].loc[:,perSessionRelevantColumns]
In [ ]:
In [ ]:
testUser = getRandomGFormGUID()
print(testUser)
#testResult = getUserDataVector(testUser)
#testResult
In [ ]:
userId = getRandomGFormGUID()
#userId = '1f27519a-971f-4e39-bac7-9920bfc4b05b' #undefined temporality
#userId = 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1' #has not answered
print(userId)
_source = correctAnswers
_rmDF = rmdf1522
#def getUserDataVector( userId, _source = [], _rmDF = rmdf1522 ):
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
columnName = str(userId)
data = pd.DataFrame(0, columns=[columnName],index=userDataVectorIndex)
score = getScore( userId )
for _temporality in score.columns:
_score = score.loc[scoreLabel,_temporality]
if(len(_score)>0):
if(_temporality == answerTemporalities[0]):
_score = _score[len(_score)-1]
else:
_score = _score[0]
else:
_score = np.nan
data.loc[scoreLabel+_temporality,columnName] = _score
data.loc['sessionsCount',columnName] = countSessions( userId, False, sessionsList, _rmDF = _rmDF)
for eventName in simpleEvents:
if eventName in craftEventCodes:
data.loc[eventName,columnName] = getUserCraftEventsTotal(eventName, userId, sessionsList)
else:
data.loc[eventName,columnName] = getUserEventsTotal(eventName, userId, sessionsList)
data.loc['maxChapter', columnName] = int(pd.Series(data = 'tutorial1.Checkpoint00')\
.append(getUserCheckpoints(userId, _rmDF = _rmDF))\
.max()[-2:])
# time spent on each chapter
times = getCheckpointsTimesUser(userId)
completionTime = 0
chapterTime = pd.Series()
for chapter in timedSectionsIndex:
deltaTime = times.loc[chapter,"firstCompletionDuration"].total_seconds()
chapterTime.loc[int(chapter[-2:])] = deltaTime
completionTime += deltaTime
# efficiency = (1 + #unlockedchapters)/(time * (1 + #death + #craft + #add + #equip))
data.loc['efficiency', columnName] = np.log(( 1 + data.loc['maxChapter', columnName] ) / \
(completionTime \
* ( 1\
+ data.loc['death', columnName] \
+ data.loc['craft', columnName]\
+ data.loc['add', columnName]\
+ data.loc['equip', columnName]\
)\
))
playedTime = getPlayedTimeUser(userId, _rmDF = _rmDF)
data.loc['thoroughness', columnName] = \
data.loc['craft', columnName]\
* data.loc['pickup', columnName]\
* ( 1 + np.power(len(playedTime['sandbox']['daysSpent']),2))
totalSpentTime = playedTime['tutorial']['totalSpentTime'] + playedTime['sandbox']['totalSpentTime']
totalSpentDays = len(playedTime['tutorial']['daysSpent'] | playedTime['sandbox']['daysSpent'])
data.loc['fun', columnName] = np.log(\
max(1,\
totalSpentTime.total_seconds()
* np.power(totalSpentDays,2)
))
data.loc['completionTime', columnName] = completionTime
for time in chapterTime.index:
data.loc[time,columnName] = chapterTime.loc[time]
if(len(_source) != 0):
if(hasAnswered(userId)):
gformLine = gform[gform[localplayerguidkey] == userId]
afters = gformLine[gformLine[QTemporality] == answerTemporalities[1]]
if(len(afters) > 0):
gformLine = afters.iloc[0]
else:
befores = gformLine[gformLine[QTemporality] == answerTemporalities[0]]
if(len(befores) > 0):
gformLine = befores.iloc[len(befores)-1]
else:
gformLine = gformLine.iloc[len(gformLine)-1]
# add data from the gform: binary score on each question
gformData = getBinarized(gformLine, _source = _source)
for question in gformData.index:
data.loc[question,columnName] = gformData.loc[question]
else:
print("warning: user " + userId + " has never answered the survey")
print(str(data))
In [ ]:
max((1,2))
In [ ]:
max(1,(totalSpentTime.total_seconds()* np.power(totalSpentDays,2)))
In [ ]:
data.loc['fun', columnName] = np.log(max(1,totalSpentTime.total_seconds()* np.power(totalSpentDays,2)))
In [ ]:
#testUID = "bfdfd356-5d6f-4696-a2f1-c1dc338aa64b" # sessionsCount == 4
userId = getRandomGFormGUID()
getUserDataVector(userId)
In [ ]:
sessionsCounts = getUserSessionsCounts(rmdf1522)
playersResponders = sessionsCounts[sessionsCounts['userId'].isin(getAllResponders())]
len(sessionsCounts), len(playersResponders)
In [ ]:
playersResponders
In [ ]:
testUID = playersResponders[playersResponders['counts']==2]['userId'].values[0]
answerTimestamps = gform[gform[localplayerguidkey] == testUID][QTimestamp]
In [ ]:
import pytz, datetime
local = pytz.timezone ("Europe/Berlin")
sample = getAllResponders()
for userId in sample:
sessions = getAllSessionsOfUser(rmdf1522,userId)
if(len(sessions) > 1):
print("------------------user " + userId + " ------------------")
print()
answerTimestamps = gform[gform[localplayerguidkey] == userId][QTimestamp]
for sessionIndex in sessions.index:
sessionId = sessions.loc[sessionIndex, 'sessionId']
_logs = rmdf1522[rmdf1522['sessionId'] == sessionId]
_logs = _logs[_logs.index.isin(_logs['section'].dropna().index)]
_timedEvents = _logs['userTime']
_timedEvents = _timedEvents.sort_values()
print("session " + str(sessionIndex))
if(len(_timedEvents) > 0):
print("\tstart: " + str(_timedEvents[0]))
print("\tend: " + str(_timedEvents[-1]))
print()
for answerTimestampIndex in answerTimestamps.index:
survey = answerTimestamps.loc[answerTimestampIndex]
utc_dt = survey.astimezone (pytz.utc)
print("\tsurvey" + str(answerTimestampIndex))
print("\t" + str(utc_dt))
if(len(_timedEvents) > 0):
if((_timedEvents[0] > utc_dt) and (_timedEvents[-1] > utc_dt)):
print("\tanswered before playing")
elif((_timedEvents[0] < utc_dt) and (_timedEvents[-1] < utc_dt)):
print("\tanswered after playing")
else:
print("\tundefined: overlap")
print("\t" + str((_timedEvents[0] > utc_dt, _timedEvents[-1] > utc_dt)))
else:
print("\tundefined: no event")
print()
print()
print()
print()
print()
In [ ]:
_logs = rmdf1522[rmdf1522['sessionId'] == sessionId][['type', 'userTime', 'section']].values[0]
_logs
In [ ]:
_timedEvents[0], _timedEvents[-1], survey
In [ ]:
survey < _timedEvents[0], survey < _timedEvents[-1]
In [ ]:
str((_timedEvents[0] < survey, _timedEvents[-1] > survey))
In [ ]:
times
In [ ]:
eventName
In [ ]:
getUserSectionsEvents( 'start', userId, sessionsList )
In [ ]:
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]